import selenium  #This is equivalent to the R command library(selenium)
from selenium import webdriver #Importing the function we need
import re
from collections import defaultdict
from bs4 import BeautifulSoup
import pandas as pd
import time
import os
import json


#Run file in the script directory

cwd = os.getcwd()
path = cwd[:-7]

os.chdir(path)


data = pd.read_excel('Data/Processed_Data/Docket_Day_Action.xlsx')
data = data[(data['term']<25)]
data = data[(data['term']>16)]


link_start = 'https://www.supremecourt.gov/search.aspx?filename=/docket/docketfiles/html/public/'
driver = webdriver.Chrome() #Load our browser
dockets = {}
for docket in data['cleaned_docket']:
    if docket not in dockets:
        docket = str(docket)
        time.sleep(1)
        if '–' in docket:
            link = link_start + docket[:2] + '-' + docket[3:]+'.html'  
        else:
            link = link_start + docket+'.html'  
        driver.get(link) 
        page_html = driver.page_source
        soup = BeautifulSoup(page_html, 'html.parser')
        t = soup.get_text()
        dockets[docket] = t
driver.quit()

with open('Data/Processed_Data/Lower_Court_Dockets/Supreme_Court_Dockets_Raw_Text_2017_2024.json', 'w') as f:
        json.dump(dockets, f, indent=4)


data = pd.read_excel('Data/Processed_Data/Docket_Day_Action.xlsx')
data = data[(data['term']<=16)]
data = data[(data['term']>0)]

link_start = 'https://www.supremecourt.gov/search.aspx?filename=/docketfiles/'
driver = webdriver.Chrome() #Load our browser
dockets = {}
for docket in data['cleaned_docket']:
    time.sleep(1)
    docket = str(docket)
    if docket not in dockets:
        if '–' in docket:
            link = link_start + docket[:2] + '-' + docket[3:]+'.htm' 
        else:
            link = link_start+docket+'.htm'
        driver.get(link) 
        page_html = driver.page_source
        soup = BeautifulSoup(page_html, 'html.parser')
        t = soup.get_text()
        dockets[docket]=t
driver.quit()

with open('Data/Processed_Data/Lower_Court_Dockets/Supreme_Court_Dockets_Raw_Text_2001_2016.json', 'w') as f:
        json.dump(dockets, f, indent=4)